import requests
from lxml import etree

# 准备58同城的URL，以及user-agent
url = "https://sz.58.com/chuzu/"
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
    "cookie": "f=n; commontopbar_new_city_info=4%7C%E6%B7%B1%E5%9C%B3%7Csz; commontopbar_ipcity=shuyang%7C%E6%B2%AD%E9%98%B3%7C0; id58=c5/nfF+sjol8pq7MDA3LAg==; 58tj_uuid=7666a956-cce2-4bed-b9a4-f2f784bbc006; als=0; wmda_uuid=c04c9142f2390c313366e6e032aad65c; wmda_new_uuid=1; wmda_visited_projects=%3B11187958619315; xxzl_deviceid=gFMnBfptKZ%2B485OUft22Lcw587ELWVXHvg3vCfVDhfW4FequzR2ONQRNkK3nCBvZ; f=n; new_uv=3"
}

# 爬取页面
r = requests.get(url=url, headers=headers)

# 使用xpath提取数据
r.encoding = "utf-8"
html = etree.HTML(r.text)

# print(r.text)
with open("58.html", "w") as f:
    f.write(r.text)

print(html)
li_list = html.xpath("//li[@class='house-cell']")
print(li_list)
